import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
pd.set_option('display.max_columns', None)
dt=pd.read_csv("dataset.csv")
dt.head()
| VIN (1-10) | County | City | State | Postal Code | Model Year | Make | Model | Electric Vehicle Type | Clean Alternative Fuel Vehicle (CAFV) Eligibility | Electric Range | Base MSRP | Legislative District | DOL Vehicle ID | Vehicle Location | Electric Utility | 2020 Census Tract | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | JTMEB3FV6N | Monroe | Key West | FL | 33040 | 2022 | TOYOTA | RAV4 PRIME | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 42 | 0 | NaN | 198968248 | POINT (-81.80023 24.5545) | NaN | 12087972100 |
| 1 | 1G1RD6E45D | Clark | Laughlin | NV | 89029 | 2013 | CHEVROLET | VOLT | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 38 | 0 | NaN | 5204412 | POINT (-114.57245 35.16815) | NaN | 32003005702 |
| 2 | JN1AZ0CP8B | Yakima | Yakima | WA | 98901 | 2011 | NISSAN | LEAF | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 73 | 0 | 15.0 | 218972519 | POINT (-120.50721 46.60448) | PACIFICORP | 53077001602 |
| 3 | 1G1FW6S08H | Skagit | Concrete | WA | 98237 | 2017 | CHEVROLET | BOLT EV | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 238 | 0 | 39.0 | 186750406 | POINT (-121.7515 48.53892) | PUGET SOUND ENERGY INC | 53057951101 |
| 4 | 3FA6P0SU1K | Snohomish | Everett | WA | 98201 | 2019 | FORD | FUSION | Plug-in Hybrid Electric Vehicle (PHEV) | Not eligible due to low battery range | 26 | 0 | 38.0 | 2006714 | POINT (-122.20596 47.97659) | PUGET SOUND ENERGY INC | 53061041500 |
dt.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 112634 entries, 0 to 112633 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 VIN (1-10) 112634 non-null object 1 County 112634 non-null object 2 City 112634 non-null object 3 State 112634 non-null object 4 Postal Code 112634 non-null int64 5 Model Year 112634 non-null int64 6 Make 112634 non-null object 7 Model 112614 non-null object 8 Electric Vehicle Type 112634 non-null object 9 Clean Alternative Fuel Vehicle (CAFV) Eligibility 112634 non-null object 10 Electric Range 112634 non-null int64 11 Base MSRP 112634 non-null int64 12 Legislative District 112348 non-null float64 13 DOL Vehicle ID 112634 non-null int64 14 Vehicle Location 112610 non-null object 15 Electric Utility 112191 non-null object 16 2020 Census Tract 112634 non-null int64 dtypes: float64(1), int64(6), object(10) memory usage: 14.6+ MB
dt.isna().sum()
VIN (1-10) 0 County 0 City 0 State 0 Postal Code 0 Model Year 0 Make 0 Model 20 Electric Vehicle Type 0 Clean Alternative Fuel Vehicle (CAFV) Eligibility 0 Electric Range 0 Base MSRP 0 Legislative District 286 DOL Vehicle ID 0 Vehicle Location 24 Electric Utility 443 2020 Census Tract 0 dtype: int64
dt.dropna(inplace=True)
dt.duplicated().sum()
0
dt.shape
(112152, 17)
dt.select_dtypes('number').columns
Index(['Postal Code', 'Model Year', 'Electric Range', 'Base MSRP',
'Legislative District', 'DOL Vehicle ID', '2020 Census Tract'],
dtype='object')
dt['Model Year'].value_counts()
2022 26455 2021 18277 2018 14190 2020 10998 2019 10216 2017 8598 2016 5709 2015 4918 2013 4669 2014 3665 2023 1863 2012 1695 2011 835 2010 24 2008 23 2000 10 1999 3 2002 2 1997 1 1998 1 Name: Model Year, dtype: int64
fig=px.histogram(dt,x='Model Year')
fig.update_traces(marker=dict(color='orange', line=dict(color='black', width=2)))
fig.show()
Between 2020 to 2022 more numbers of models are manufactured
fig=px.histogram(dt,x='Postal Code')
fig.update_traces(marker=dict(color='Green', line=dict(color='black', width=2)))
fig.show()
filt = dt['County'].value_counts().reset_index(name='Count').head(10)
fig = go.Figure()
fig.add_trace(go.Scatter(x=filt['index'], y=filt['Count'],mode='lines+markers',
marker=dict(size=10,),
line=dict(color='blue', width=2),
showlegend=False))
fig.update_layout(title='Car Make Frequency',
xaxis_title='Car Make', yaxis_title='Count')
fig.show()
dt.select_dtypes(include='object').columns
Index(['VIN (1-10)', 'County', 'City', 'State', 'Make', 'Model',
'Electric Vehicle Type',
'Clean Alternative Fuel Vehicle (CAFV) Eligibility', 'Vehicle Location',
'Electric Utility'],
dtype='object')
filt = dt['Make'].value_counts().head(15)
fig = px.bar(filt, y=filt.index, x=filt.values,color=filt.index,title="Frequency of Car Makes")
fig.update_layout(xaxis_title='Car Make', yaxis_title='Count',xaxis_tickangle=0)
fig.show()
filt=dt['County'].value_counts().head(10)
fig=px.bar(filt,y='County',color=filt.index)
fig.update_layout(xaxis_title='County', yaxis_title='count',title='Frequency of County')
fig.show()
filt = dt.groupby(['County', 'City']).size().reset_index(name='Count')
fig = px.treemap(
filt,
path=['County', 'City'],
values='Count',
title="Car Counts by County and City",
color='Count',
color_continuous_scale='Blues')
fig.show()
/Users/koushik/opt/anaconda3/lib/python3.9/site-packages/plotly/express/_core.py:1637: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. /Users/koushik/opt/anaconda3/lib/python3.9/site-packages/plotly/express/_core.py:1637: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
filt=dt['Electric Vehicle Type'].value_counts().reset_index(name='Count')
fig=px.pie(filt,names='index',values='Count',hole=0.4)
fig.update_layout(title="Electric Vehicle Type")
fig.show()
filt=dt['City'].value_counts().reset_index(name='Count').head(15)
fig=px.pie(filt,names='index',values='Count',color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title="Types of City")
fig.show()
filt = dt['Model Year'].value_counts().reset_index(name='Count').head(10)
fig = go.Figure()
fig.add_trace(go.Scatter(x=filt['index'], y=filt['Count'],mode='lines+markers',
marker=dict(size=10,),
line=dict(color='blue', width=2),
showlegend=False))
fig.update_layout(title='Model Year',xaxis_title='Model Year', yaxis_title='Count')
fig.show()
dt.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 112152 entries, 2 to 112633 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 VIN (1-10) 112152 non-null object 1 County 112152 non-null object 2 City 112152 non-null object 3 State 112152 non-null object 4 Postal Code 112152 non-null int64 5 Model Year 112152 non-null int64 6 Make 112152 non-null object 7 Model 112152 non-null object 8 Electric Vehicle Type 112152 non-null object 9 Clean Alternative Fuel Vehicle (CAFV) Eligibility 112152 non-null object 10 Electric Range 112152 non-null int64 11 Base MSRP 112152 non-null int64 12 Legislative District 112152 non-null float64 13 DOL Vehicle ID 112152 non-null int64 14 Vehicle Location 112152 non-null object 15 Electric Utility 112152 non-null object 16 2020 Census Tract 112152 non-null int64 dtypes: float64(1), int64(6), object(10) memory usage: 15.4+ MB
c = dt.groupby(['Model Year', 'Make']).size().reset_index(name='Count')
top_c = c.sort_values(by='Count', ascending=False).head(50)
pivot_data = top_c.pivot(index='Model Year', columns='Make', values='Count').fillna(0)
fig = go.Figure()
for make in pivot_data.columns:
fig.add_trace(go.Scatter(x=pivot_data.index, y=pivot_data[make], mode='lines+markers',
name=make, marker=dict(size=8), line=dict(width=2)))
fig.update_layout(title='Top Car Makes by Model Year',xaxis_title='Model Year',yaxis_title='Count',hovermode='x')
fig.show()
fig_box_ev_type_range = px.box(dt, x='Electric Vehicle Type', y='Electric Range')
fig_box_ev_type_range.show()
df_counts = dt.groupby(['Model Year', 'Make']).size().reset_index(name='Count').tail(100)
fig=px.bar(df_counts, x='Model Year', y='Count', color='Make',
title='Count of Different Electric Vehicles by Model Year')
fig.show()
fig = px.scatter(dt, x='Model Year', y='Electric Range',color='Clean Alternative Fuel Vehicle (CAFV) Eligibility',title='Electric Range by Car Make',
labels={'Electric Range': 'Electric Range', 'Make': 'Car Make'},
hover_data=['Make', 'Electric Range'])
fig.show()
avg = dt.groupby('Make')['Electric Range'].mean().reset_index()
fig = go.Figure()
fig.add_trace(go.Scatter(x=avg['Make'], y=avg['Electric Range'],
mode='lines+markers',
marker=dict(size=10, color='blue'),
line=dict(color='blue', width=2),
name='Electric Range'))
fig.update_layout(title='Electric Range by Car Make',
xaxis_title='Car Make',
yaxis_title='Electric Range',
yaxis=dict(range=[0, max(avg['Electric Range']) + 50]),
hovermode='closest')
fig.show()
dt.head()
| VIN (1-10) | County | City | State | Postal Code | Model Year | Make | Model | Electric Vehicle Type | Clean Alternative Fuel Vehicle (CAFV) Eligibility | Electric Range | Base MSRP | Legislative District | DOL Vehicle ID | Vehicle Location | Electric Utility | 2020 Census Tract | Longitude | Latitude | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | JN1AZ0CP8B | Yakima | Yakima | WA | 98901 | 2011 | NISSAN | LEAF | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 73 | 0 | 15.0 | 218972519 | POINT (-120.50721 46.60448) | PACIFICORP | 53077001602 | -120.50721 | 46.60448 |
| 3 | 1G1FW6S08H | Skagit | Concrete | WA | 98237 | 2017 | CHEVROLET | BOLT EV | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 238 | 0 | 39.0 | 186750406 | POINT (-121.7515 48.53892) | PUGET SOUND ENERGY INC | 53057951101 | -121.75150 | 48.53892 |
| 4 | 3FA6P0SU1K | Snohomish | Everett | WA | 98201 | 2019 | FORD | FUSION | Plug-in Hybrid Electric Vehicle (PHEV) | Not eligible due to low battery range | 26 | 0 | 38.0 | 2006714 | POINT (-122.20596 47.97659) | PUGET SOUND ENERGY INC | 53061041500 | -122.20596 | 47.97659 |
| 5 | 5YJ3E1EB5J | Snohomish | Bothell | WA | 98021 | 2018 | TESLA | MODEL 3 | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 215 | 0 | 1.0 | 475635324 | POINT (-122.18384 47.8031) | PUGET SOUND ENERGY INC | 53061051916 | -122.18384 | 47.80310 |
| 6 | 1N4AZ0CP4D | Snohomish | Everett | WA | 98203 | 2013 | NISSAN | LEAF | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 75 | 0 | 38.0 | 253546023 | POINT (-122.23019 47.94949) | PUGET SOUND ENERGY INC | 53061040900 | -122.23019 | 47.94949 |
fig_density = px.density_contour(
dt,
x='Base MSRP',
y='Electric Range',
color='Clean Alternative Fuel Vehicle (CAFV) Eligibility',
title='2D Density Plot: Electric Range vs. Base MSRP',
labels={'Base MSRP': 'Base MSRP ($)', 'Electric Range': 'Electric Range (miles)'},
height=600,
)
fig_density.update_layout(
xaxis_title='Base MSRP ($)',
yaxis_title='Electric Range (miles)',
)
fig_density.show()
c=dt.groupby('Make')['Model'].value_counts().to_frame()
c.head(30)
| Model | ||
|---|---|---|
| Make | Model | |
| AUDI | E-TRON | 795 |
| A3 | 572 | |
| Q5 E | 478 | |
| E-TRON SPORTBACK | 171 | |
| Q5 | 137 | |
| E-TRON GT | 75 | |
| Q4 | 62 | |
| RS E-TRON GT | 16 | |
| A7 | 11 | |
| A8 E | 3 | |
| AZURE DYNAMICS | TRANSIT CONNECT ELECTRIC | 7 |
| BENTLEY | BENTAYGA | 2 |
| FLYING SPUR | 1 | |
| BMW | I3 | 1888 |
| X5 | 1402 | |
| 530E | 323 | |
| 330E | 299 | |
| X3 | 292 | |
| IX | 162 | |
| I4 | 154 | |
| I8 | 102 | |
| 740E | 29 | |
| 745E | 7 | |
| 745LE | 2 | |
| CADILLAC | ELR | 76 |
| CT6 | 32 | |
| CHEVROLET | BOLT EV | 4895 |
| VOLT | 4870 | |
| SPARK | 248 | |
| BOLT EUV | 126 |
c = dt.groupby('Make')['Model'].value_counts().to_frame(name='Count')
c = c.reset_index()
top_50 = c.head(60)
fig = px.bar(top_50, x='Model', y='Count', color='Make',
title='Top 50 Make-Model Combinations',
labels={'Model': 'Car Model', 'Count': 'Frequency'},
hover_data=['Make'])
fig.update_layout(xaxis_tickangle=90)
fig.show()
make_counts = dt['Make'].value_counts().reset_index()
make_counts.columns = ['Make', 'Count']
top_makes = make_counts.head(15)
fig_pie = px.pie(top_makes,
names='Make',
values='Count',
title='Top mode EV Makes by Frequency',
labels={'Make': 'Car Make', 'Count': 'Frequency'})
fig_pie.show()
ev_count_by_state = dt.groupby('State').size().reset_index(name='Number_of_EV_Vehicles')
ev_count_by_state
fig = px.choropleth(ev_count_by_state,
locations='State',
locationmode="USA-states",
color='Number_of_EV_Vehicles',
scope="usa",
color_continuous_scale="Viridis",
labels={'Number_of_EV_Vehicles': 'EV Vehicles'},
title='Number of EV Vehicles by State')
fig.show()
dt['Longitude'] = dt['Vehicle Location'].apply(lambda loc: float(loc.split()[1][1:]))
dt['Latitude'] = dt['Vehicle Location'].apply(lambda loc: float(loc.split()[2][:-1]))
location_counts = dt.groupby(['Latitude', 'Longitude', 'Postal Code', 'County', "State"]).size().reset_index(name='EV Count')
fig_scatter_map = px.scatter_mapbox(location_counts,
lat='Latitude',
lon='Longitude',
color='EV Count',
size='EV Count',
mapbox_style='carto-positron',
zoom=3,
center={'lat': 37.0902, 'lon': -95.7129},
title='Map of Electric Vehicle Locations')
fig_scatter_map.show()
ev_make_by_year = dt.groupby(['Make', 'Model Year']).size().reset_index(name='Number of Vehicles')
ev_make_by_year.head()
| Make | Model Year | Number of Vehicles | |
|---|---|---|---|
| 0 | AUDI | 2016 | 214 |
| 1 | AUDI | 2017 | 185 |
| 2 | AUDI | 2018 | 173 |
| 3 | AUDI | 2019 | 387 |
| 4 | AUDI | 2020 | 224 |
fig = px.bar(ev_make_by_year,
y='Make',
x='Number of Vehicles',
color='Make',
animation_frame='Model Year',
orientation='h',
title='EV Makes and their Count Over the Years',
labels={'Number of Vehicles': 'Number of EV Vehicles'},
range_x=[0, 3000]
)
fig.update_traces(texttemplate='%{x}',
textposition='outside',
textfont_size=16)
fig.update_layout(
xaxis=dict(showgrid=True, gridcolor='LightGray'),
yaxis_title='EV Makes',
xaxis_title='Number of EV Vehicles',
showlegend=False,
title_x=0.5,
title_font=dict(size=20),
margin=dict(l=50, r=50, t=50, b=50),
width=800,
height=600)
fig.show()